Building Maps

library(tidyverse)
library(maps)
library(mapdata)
library(lubridate)
library(viridis)
library(wesanderson)
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
    borders("world", colour = NA, fill = "grey90") +
    theme_bw() +
    geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
    labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
        size="Cases (x1000))") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5)
## Warning: Removed 54 rows containing missing values (geom_point).

daily_report <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-05-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Country_Region == "US") %>% 
  filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
                  "Puerto Rico","Northern Mariana Islands", 
                  "Virgin Islands", "Recovered", "Guam", "Grand Princess",
                  "District of Columbia", "Diamond Princess")) %>% 
  filter(Lat > 0)
## Parsed with column specification:
## cols(
##   FIPS = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
    borders("state", colour = "black", fill = "grey90") +
    theme_bw() +
    geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
    labs(title = 'COVID-19 Confirmed Cases in the US', x = '', y = '',
        size="Cases (x1000))") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5)

mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
    borders("state", colour = "white", fill = "grey90") +
    geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
    scale_size_continuous(name="Cases", trans="log", range=c(1,7), 
                        breaks=mybreaks, labels = c("1-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
    scale_color_viridis_c(option="viridis",name="Cases",
                        trans="log", breaks=mybreaks, labels = c("1-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+"))  +
# Cleaning up the graph
  
  theme_void() + 
    guides( colour = guide_legend()) +
    labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
    theme(
      legend.position = "bottom",
      text = element_text(color = "#22211d"),
      plot.background = element_rect(fill = "#ffffff", color = NA), 
      panel.background = element_rect(fill = "#ffffff", color = NA), 
      legend.background = element_rect(fill = "#ffffff", color = NA)
    ) +
    coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 40 rows containing missing values (geom_point).

Mapping data to shapes

daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Country_Region == "US") %>% 
  group_by(Province_State) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Province_State = tolower(Province_State))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# load the US map data
us <- map_data("state")
# We need to join the us map data with our daily report to make one data frame/tibble
state_join <- left_join(us, daily_report, by = c("region" = "Province_State"))
# plot state map
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
# Add data layer
  geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
  scale_fill_gradientn(colours = 
                         wes_palette("Zissou1", 100, type = "continuous"),
                         trans = "log10") +
  labs(title = "COVID-19 Confirmed Cases in the US'")

library(RColorBrewer)
# To display only colorblind-friendly brewer palettes, specify the option colorblindFriendly = TRUE as follow:
# display.brewer.all(colorblindFriendly = TRUE)
# Get and format the covid report data
report_03_27_2020 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  unite(Key, Admin2, Province_State, sep = ".") %>% 
  group_by(Key) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# dim(report_03_27_2020)
# get and format the map data
us <- map_data("state")
counties <- map_data("county") %>% 
  unite(Key, subregion, region, sep = ".", remove = FALSE)
# Join the 2 tibbles
state_join <- left_join(counties, report_03_27_2020, by = c("Key"))
# sum(is.na(state_join$Confirmed))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  # Add data layer
  borders("state", colour = "black") +
  geom_polygon(data = state_join, aes(fill = Confirmed)) +
  scale_fill_gradientn(colors = brewer.pal(n = 5, name = "PuRd"),
                       breaks = c(1, 10, 100, 1000, 10000, 100000),
                       trans = "log10", na.value = "White") +
  ggtitle("Number of Confirmed Cases by US County") +
  theme_bw() 
## Warning: Transformation introduced infinite values in discrete y-axis

daily_report <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Province_State == "Massachusetts") %>% 
  group_by(Admin2) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
us <- map_data("state")
ma_us <- subset(us, region == "massachusetts")
counties <- map_data("county")
ma_county <- subset(counties, region == "massachusetts")
state_join <- left_join(ma_county, daily_report, by = c("subregion" = "Admin2")) 
# plot state map
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
# Add data layer
  geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
    scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuGn"),
                         trans = "log10") +
  labs(title = "COVID-19 Confirmed Cases in Massachusetts'")

daily_report
## # A tibble: 14 x 2
##    Admin2              Confirmed
##    <chr>                   <dbl>
##  1 barnstable                283
##  2 berkshire                 213
##  3 bristol                   424
##  4 dukes and nantucket        12
##  5 essex                    1039
##  6 franklin                   85
##  7 hampden                   546
##  8 hampshire                 102
##  9 middlesex                1870
## 10 norfolk                   938
## 11 plymouth                  621
## 12 suffolk                  1896
## 13 unassigned                270
## 14 worcester                 667

Interactive graphs

library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(
  ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
# Add data layer
  geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
    scale_fill_gradientn(colours = 
                         wes_palette("Zissou1", 100, type = "continuous")) +
  ggtitle("COVID-19 Cases in MA") +
# Cleaning up the graph
  labs(x=NULL, y=NULL) +
  theme(panel.border = element_blank()) +
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text = element_blank()) 
)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# Read in the daily report
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  group_by(Country_Region) %>% 
  summarize(Confirmed = sum(Confirmed), Deaths = sum(Deaths))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# Read in the world map data
world <- as_tibble(map_data("world"))

# Check to see if there are differences in the naming of countries
setdiff(world$region, daily_report$Country_Region) 
##  [1] "Aruba"                               "Anguilla"                           
##  [3] "American Samoa"                      "Antarctica"                         
##  [5] "French Southern and Antarctic Lands" "Antigua"                            
##  [7] "Barbuda"                             "Saint Barthelemy"                   
##  [9] "Bermuda"                             "Ivory Coast"                        
## [11] "Democratic Republic of the Congo"    "Republic of Congo"                  
## [13] "Cook Islands"                        "Cape Verde"                         
## [15] "Curacao"                             "Cayman Islands"                     
## [17] "Czech Republic"                      "Canary Islands"                     
## [19] "Falkland Islands"                    "Reunion"                            
## [21] "Mayotte"                             "French Guiana"                      
## [23] "Martinique"                          "Guadeloupe"                         
## [25] "Faroe Islands"                       "Micronesia"                         
## [27] "UK"                                  "Guernsey"                           
## [29] "Greenland"                           "Guam"                               
## [31] "Heard Island"                        "Isle of Man"                        
## [33] "Cocos Islands"                       "Christmas Island"                   
## [35] "Chagos Archipelago"                  "Jersey"                             
## [37] "Siachen Glacier"                     "Kiribati"                           
## [39] "Nevis"                               "Saint Kitts"                        
## [41] "South Korea"                         "Saint Martin"                       
## [43] "Marshall Islands"                    "Macedonia"                          
## [45] "Myanmar"                             "Northern Mariana Islands"           
## [47] "Montserrat"                          "New Caledonia"                      
## [49] "Norfolk Island"                      "Niue"                               
## [51] "Bonaire"                             "Sint Eustatius"                     
## [53] "Saba"                                "Nauru"                              
## [55] "Pitcairn Islands"                    "Palau"                              
## [57] "Puerto Rico"                         "North Korea"                        
## [59] "Madeira Islands"                     "Azores"                             
## [61] "Palestine"                           "French Polynesia"                   
## [63] "South Sandwich Islands"              "South Georgia"                      
## [65] "Saint Helena"                        "Ascension Island"                   
## [67] "Solomon Islands"                     "Saint Pierre and Miquelon"          
## [69] "Swaziland"                           "Sint Maarten"                       
## [71] "Turks and Caicos Islands"            "Turkmenistan"                       
## [73] "Tonga"                               "Trinidad"                           
## [75] "Tobago"                              "Taiwan"                             
## [77] "USA"                                 "Vatican"                            
## [79] "Grenadines"                          "Saint Vincent"                      
## [81] "Virgin Islands"                      "Vanuatu"                            
## [83] "Wallis and Futuna"                   "Samoa"
# Many of these countries are considered states or territories in the JHU covid reports,
# but let's fix a few of them

world <- as_tibble(map_data("world")) %>% 
 mutate(region = str_replace_all(region, c("USA" = "US", "Czech Republic" = "Czechia",  
        "Ivory Coast" = "Cote d'Ivoire", "Democratic Republic of the Congo" = "Congo (Kinshasa)", 
        "Republic of Congo" = "Congo (Brazzaville)")))

# Join the covid report with the map data
country_join <- left_join(world, daily_report, by = c("region" = "Country_Region"))

# Create the graph
ggplotly(
ggplot(data = world, mapping = aes(x = long, y = lat, text = region, group = group)) + 
  coord_fixed(1.3) + 
# Add data layer
  geom_polygon(data = country_join, aes(fill = Deaths), color = "black") +
  scale_fill_gradientn(colours = 
                         wes_palette("Zissou1", 100, type = "continuous")) +
  labs(title = "COVID-19 Deaths")
)

Exercises

# Load 9/26 data
report_9_26 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
  rename(Long = "Long_")
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
# Summarize counts for each country
confirmed_report_9_26 <- report_9_26%>%
  filter(Lat != "NA", Long != "NA") %>%
  group_by(Country_Region) %>%
  summarize(Confirmed = sum(Confirmed),
            Deaths = sum(Deaths),
            Lat = median(Lat),
            Long = median(Long))
## `summarise()` ungrouping output (override with `.groups` argument)
confirmed_report_9_26
## # A tibble: 186 x 5
##    Country_Region      Confirmed Deaths   Lat   Long
##    <chr>                   <dbl>  <dbl> <dbl>  <dbl>
##  1 Afghanistan             39192   1453  33.9  67.7 
##  2 Albania                 13153    375  41.2  20.2 
##  3 Algeria                 50914   1711  28.0   1.66
##  4 Andorra                  1836     53  42.5   1.52
##  5 Angola                   4672    171 -11.2  17.9 
##  6 Antigua and Barbuda        98      3  17.1 -61.8 
##  7 Argentina              702484  15543 -38.4 -63.6 
##  8 Armenia                 49072    948  40.1  45.0 
##  9 Australia               27040    872 -34.4 146.  
## 10 Austria                 42214    787  47.5  14.6 
## # ... with 176 more rows
# map the data
ggplot(data = report_9_26, mapping = aes(x = Long, y = Lat, size = Deaths/1000)) +
    borders("world", colour = NA, fill = "grey90") +
    theme_bw() +
    geom_point(shape = 21, color='red', fill='red', alpha = 0.5) +
    labs(title = 'World COVID-19 Deaths',x = '', y = '',
        size="Cases (x1000))") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5)
## Warning: Removed 81 rows containing missing values (geom_point).

US_cases_9_26 <- report_9_26 %>%
  # Filter out non-continental US
  filter(Country_Region == "US", !(Province_State %in% c("Alaska","Hawaii", "American Samoa",
                  "Puerto Rico","Northern Mariana Islands", 
                  "Virgin Islands", "Recovered", "Guam", "Grand Princess",
                  "District of Columbia", "Diamond Princess")), Lat > 0)
mybreaks <- c(10, 100, 1000, 10000, 100000, 100000)
ggplot(US_cases_9_26, aes(x = Long, y = Lat, size = Confirmed/10000)) +
    borders("state", colour = "white", fill = "grey90") +
    geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
    scale_size_continuous(name="Cases", trans="log", range=c(1,5), 
                        breaks=mybreaks, labels = c("10-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+", "100,000+")) +
    scale_color_viridis_c(option="viridis",name="Cases",
                        trans="log", breaks=mybreaks, labels = c("10-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+", "100,000+"))  +
# Cleaning up the graph
  
  theme_void() + 
    guides( colour = guide_legend()) +
    labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
    theme(
      legend.position = "bottom",
      text = element_text(color = "#22211d"),
      plot.background = element_rect(fill = "#ffffff", color = NA), 
      panel.background = element_rect(fill = "#ffffff", color = NA), 
      legend.background = element_rect(fill = "#ffffff", color = NA)
    ) +
    coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 6 rows containing missing values (geom_point).

US_Counties_9_26 <- report_9_26 %>% 
  unite(Key, Admin2, Province_State, sep = ".") %>% 
  group_by(Key) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Key = tolower(Key))
## `summarise()` ungrouping output (override with `.groups` argument)
# get and format the map data
us <- map_data("state")
counties <- map_data("county") %>% 
  unite(Key, subregion, region, sep = ".", remove = FALSE)
# Join the 2 tibbles
state_join <- left_join(counties, US_Counties_9_26, by = c("Key"))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  # Add data layer
  borders("state", colour = "black") +
  geom_polygon(data = state_join, aes(fill = Confirmed)) +
  scale_fill_gradientn(colors = brewer.pal(n = 5, name = "RdBu"),
                       breaks = c(1, 10, 100, 1000, 10000, 100000),
                       trans = "log10", na.value = "White") +
  ggtitle("Number of Confirmed Cases by US County") +
  theme_classic() 
## Warning: Transformation introduced infinite values in discrete y-axis

florida_9_26 <- report_9_26 %>%
  filter(Province_State == "Florida") %>%
  group_by(Admin2) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Admin2 = tolower(Admin2))
## `summarise()` ungrouping output (override with `.groups` argument)
us <- map_data("state")
fl_us <- subset(us, region == "florida")
counties <- map_data("county")
fl_county <- subset(counties, region == "florida")
state_join <- left_join(fl_county, florida_9_26, by = c("subregion" = "Admin2")) 
ggplotly(
  ggplot(data = fl_county, mapping = aes(x = long, y = lat, group = group)) +
    coord_fixed(1.3) +
    geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
    scale_fill_gradientn(colors = wes_palette("FantasticFox1", 100, type = "continuous")) +
    # Cleaning up graph
    labs(title = "COVID-19 Cases in FL", x = NULL, y = NULL) +
    theme_classic() +
    theme(axis.ticks = element_blank())
)
  1. Create a report with static maps and interactive graphs that is meant to be read by others (e.g. your friends and family). Hide warnings, messages and even the code you used so that it is readable. Included references. Link to the Lab 6 report from your Github site. Submit the link to Moodle. Hide warnings, messages and even the code you used so that it is readable. Included references. Link to the Lab 6 report from your Github site. Submit the link to Moodle. I will talk more about the format on Wed.

References

(JHU CSSE 2020)

JHU CSSE. 2020. “CSSEGISandData/Covid-19.” Github Repository. https://github.com/CSSEGISandData/COVID-19.